home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SPACE 2
/
SPACE - Library 2 - Volume 1.iso
/
utility
/
533
/
kwic
/
kwic.pas
< prev
next >
Wrap
Pascal/Delphi Source File
|
1991-07-02
|
21KB
|
598 lines
{$D+,R+,P+,C+,U10} {Compiled via command line}
{Project #359. Key words in context.}
PROGRAM KWIC;
{$I GEMSUBS.PAS}
{$I AUXSUBS.PAS}
{$I CURRENT\KWIC.I}
(*
Program : Key Words in Context
Version : 1.0
Language : Personal Pascal 2.05
Resource : K-Resource by Kuma
Written by : Merlin L. Hanson
Date : July 1991
*** Pseudocode ***
Read the input data file
Read the file of words to be excluded
for all lines do
remember where each line starts
for all words do
if word is not in the exclusion list, add the word to outlist
sort the list
print the entries
Note: outlist is a 2-tuple: word, line number in the file.
exclusion list is a list of the articles, prepositions, and any
ad-hoc words the user wishes to add.
For speed, put the 'bad words' into a linked list with the bases
in an array corresponding to 'a'..'z'.
*** end *** of pseudocode *)
(*CONST actual declaration is in file KWIC.I *)
VAR
Printer : {FILE OF}text;
RSC_Name : path_name;
{ *** Subprograms of a utility nature are listed ***
prior to the main procedure (DoIt) }
PROCEDURE Abort(s:string);
BEGIN{abort}
WriteLn(s);
WriteLn('Press return');
ReadLn;
HALT;
END{abort};
{Returns TRUE if there is a character in the keyboard buffer.
Follow with ReadLn (not Read) to 'eat' the character.}
FUNCTION KeyPressed:boolean;
GEMDOS($0B); {aka CONSTAT, aka cconis}
{If any key has been pressed, wait for press of [Return].
Then return control to the caller. Unfortunately, the key pressed
will be displayed on the monitor.}
PROCEDURE Pause;
BEGIN {pause}
IF KeyPressed
THEN ReadLn;
END{pause};
{Empty the keyboard buffer so that user and the program are in synch
with each other.}
PROCEDURE DrainKbd;
BEGIN
WHILE KeyPressed DO
ReadLn; {To that big bit-bucket in the sky.}
END{drainkbd};
{ *** Get File Name ***
Display 'Note' above the file selector box. Display 'Current' drive
and path for path name. Display 's' as suggested file name.
Return 's' as a full drive/path/file name.}
PROCEDURE GetFileName(Note:string; VAR s:path_name);
VAR MyPath:path_name; x_coord:integer;
PROCEDURE GetDriveAndPath(VAR S:string);
{ A procedure that returns a Pascal string containing the current drive,
current path and all punctuation. Simply append a raw file name.}
VAR
i : integer;
T : string;
Path : string;
DriveString : string;
FUNCTION CurrentDisk:integer;
{Returns an integer specifying the current drive. 0 specifies A, etc.}
GEMDOS($19);
PROCEDURE GetDir(VAR Ptr:string; DriveID:integer);
{Puts a C string defining the folders currently open on DriveID
into S. DriveID of 0 specifies the current drive.}
GEMDOS($47);
BEGIN {getdriveandpath}
DriveString := CONCAT(CHR(ORD('A')+CurrentDisk),':');
GetDir(Path,0);
{Convert from C to Pascal.}
i := 0;
WHILE Path[i] <> CHR(0) DO
BEGIN
T[i+1] := Path[i];
i := i+1;
END;
{Set the length}
T[0] := CHR(i);
S := CONCAT(DriveString,T,'\');
END {getdriveandpath};
BEGIN{getfilename}
x_coord := 40 - (LENGTH(Note) DIV 2);
GOTOXY(x_coord,1);
Write(#27,'p'); {invert color}
Write(Note);
Write(#27,'q'); {normal color}
GetDriveAndPath(MyPath);
IF GET_IN_FILE(MyPath,s)
THEN { }
ELSE HALT;
END{getfilename};
{Returns the lower case equivalent of an upper case character.}
FUNCTION LowerCase(ch:char):char;
BEGIN
IF (ch >= 'A') AND (ch <= 'Z')
THEN LowerCase := CHR(ORD(ch)+32)
ELSE LowerCase := ch;
END {lowercase};
PROCEDURE DoDither;
VAR w,h:integer;
FUNCTION GetRez:integer;
XBIOS(4);
BEGIN{dodither}
Hide_Mouse;
CASE GetRez OF
0 : BEGIN w := 320; h := 200; END;
1 : BEGIN w := 640; h := 200; END;
2 : BEGIN w := 640; h := 400; END;
END{case};
Paint_Style(5);
Paint_Rect(0,0,w,h);
Show_Mouse;
END{dodither};
(**************************
*** Principal Procedure ***
**************************)
PROCEDURE DoIt; (******************)
TYPE
s254 = string[254];
s11 = string[11]; {max length of a key word}
T158 = integer; {because the sort was a 'stock' item and was paramaterized}
T383 = s11;
T857 = PACKED ARRAY[1..110000]OF char; {max size of input file}
T062 = 1..8000; {max number of key words}
T141 = ARRAY[T062] OF s11;
{*** Following used by procedure ReadBad ***}
T269 = ^T838;
T838 = RECORD
KeyWd : s11; {Word to be ignored}
Link : T269;
END{record};
T560 = ARRAY[0..25] OF T269; {for a,b,c, .. ,z}
VAR
Arr : T857; {Character array copy of text file.}
inix : long_integer; {always index for 'Arr'}
NbrBytes : long_integer; {number of bytes in the input file}
Leftmost : integer; {key words are left justified to this column.}
i,LineNbr : integer;
Letter : SET OF char;
junk : char;
junki : integer;
{The next 2 should properly be a single record, but the sort routine
(pre-built) sorts arrays of a rather simple kind;
not arrays of records. The index is simply the order of acquisiton
of the word from the original text file.}
KeyWdArr : {ARRAY[T062] OF s11} T141; {list of key words.}
TheLine : ARRAY[T062] OF integer; {line nbr associated
with the key word.}
NextIx : integer; {index for above 2}
(*** end of 'parallel' variables ***)
FirstCh : ARRAY[1..3600] OF long_integer;
{FirstCh[j] contains the
inix value of the first char of line j.
The input file can have up to 3600 lines.}
NextFirst : integer; {allocation index for FirstCh.}
LastKStrt : integer; {ignore words starting after this column.}
fnstr : path_name; {file name string}
{ *** used by bad word logic *** }
BadBase : {ARRAY [0..25] OF ^record} T560;
{$I MLIBPAS\TXTTOARR.PAS}
{Read all the bad words into one of 26 linked lists. This provides
a 'good enough for government work' sort to speed things up.}
PROCEDURE ReadBad; (*****************)
VAR
BadFile : text;
Str : s11;
i : integer;
s2 : string;
{Append S to the appropriate list.}
PROCEDURE AddStr(S:s11);
VAR ix:0..25; Temp:T269; Ptr:T269;
BEGIN {addstr}
ix := ORD(S[1]) - 97; {'a' becomes 0}
NEW(Ptr);
Ptr^.Link := NIL;
Ptr^.KeyWd := s;
{Assert: Ptr points at the new entry. Now add the new entry
to the linked list.}
Temp := BadBase[ix];
IF BadBase[ix] = NIL
THEN BadBase[ix] := Ptr {add to root}
ELSE {add at end of list}
BEGIN
Temp := BadBase[ix];
WHILE Temp^.link <> NIL DO
Temp := Temp^.link;
{Temp points at the last viable entry.}
Temp^.link := Ptr;
END;
END{addstr};
{Print all the 'b's in the list.} (* for test *)
PROCEDURE PrintList;
VAR Temp:T269;
BEGIN{printlist}
Temp := BadBase[1];
WHILE Temp <> NIL DO
BEGIN
WriteLn(Temp^.KeyWd);
Temp := Temp^.link;
END;
END{printlist};
BEGIN{readbad}
FOR i := 0 TO 25 DO
BadBase[i] := NIL;
fnstr := 'BADWDS.TXT';
GetFileName(' Choose words to be excluded ',fnstr);
RESET(BadFile,fnstr);
WHILE NOT EOF(BadFile) DO
BEGIN
ReadLn(BadFile,str);
AddStr(str);
END;
END{readbad};
PROCEDURE GetLength(VAR FinalCol:integer);
VAR junk,Choice:integer; ThisBox:dialog_ptr; S:string[255];
BEGIN
Choice := DO_ALERT(
'[0][Ignore rightmost columns| of input?][ Yes | No ]' ,2);
IF Choice = 1
THEN
BEGIN
Find_Dialog(LgthBox,ThisBox);
Center_Dialog(ThisBox);
junk := Do_Dialog(ThisBox,0);
Get_DEdit(ThisBox,LastCol,S);
READV(s,FinalCol);
FinalCol := FinalCol + 1; {fudge}
END
ELSE FinalCol := 999;
END{getlength};
PROCEDURE MakeList; (*****************)
VAR Wd:s11; Done,Bummer:boolean; Line:integer; StartIx,StartLine:long_integer;
FUNCTION FindStr(s:string):boolean;
VAR Find:boolean; Temp:T269; i,ix:integer;
BEGIN{findstr}
FOR i := 1 TO LENGTH(s) DO
s[i] := LowerCase(s[i]);
ix := ORD(s[1]) - 97;
Temp := BadBase[ix];
Find := FALSE;
WHILE (Temp <> NIL) AND (NOT Find) DO
IF Temp^.KeyWd = S
THEN Find := TRUE
ELSE Temp := Temp^.link;
FindStr := Find;
END{findstr};
PROCEDURE GetWord(VAR str:s11; VAR LineNumber:integer);
VAR Done : boolean;
PROCEDURE SaveFirst(ix:long_integer);
BEGIN
FirstCh[NextFirst] := ix;
NextFirst := NextFirst + 1;
StartLine := inix;
END{savefirst};
{Append characters to the string until the end of the word is found.}
PROCEDURE FindEnd;
BEGIN{findend}
WHILE NOT Done DO
IF Arr[inix] IN Letter
THEN BEGIN
(*Personal Pascal has a flaw in the CONCAT logic.
It won't detect a string overflow here so logic added
to do so.*)
IF LENGTH(str) < 11
THEN str := CONCAT(str,Arr[inix]);
inix := inix + 1;
END
ELSE IF Arr[inix] = #13
THEN BEGIN
LineNbr := LineNbr + 1;
Done := True;
inix := inix + 2;
SaveFirst(inix);
END
ELSE BEGIN {blank, comma, ...}
Done := TRUE;
inix := inix + 1;
END;
END{findend};
{Find the firat character of a word. Then pass control to
'FindEnd' to locate the last character of that word.}
BEGIN{getword}
{Find the first character and put it in the string.}
str := '';
LineNumber := LineNbr; {LineNbr may be advanced before return to here!}
StartIx := inix;
Done := FALSE;
REPEAT
IF (Arr[inix] IN Letter) AND (inix < NbrBytes)
AND ((inix - StartLine) < LastKStrt)
THEN
BEGIN
{First char in a word}
str := CONCAT(str,Arr[inix]);
inix := inix + 1;
FindEnd;
END
ELSE IF Arr[inix] = ' '
THEN inix := inix + 1
ELSE IF Arr[inix] = #13
THEN
BEGIN
LineNbr := LineNbr + 1;
Done := TRUE;
inix := inix + 2;
SaveFirst(inix);
END
ELSE
{Garbage character of some sort.}
inix := inix + 1;
UNTIL Done;
END{getword};
BEGIN{makelist}
Done := FALSE;
Letter := ['a'..'z','A'..'Z','0'..'9','''','-'];
StartLine := inix;
WHILE inix < NbrBytes DO
BEGIN
GetWord(Wd,Line);
IF (Wd[1] IN ['a'..'z','A'..'Z']) AND (LENGTH(Wd) > 0)
THEN Bummer := FindStr(Wd);
IF NOT Bummer AND (LENGTH(Wd) > 0)
THEN
BEGIN
KeyWdArr[NextIx] := Wd; {The key word just found}
TheLine[NextIx] := Line; {Line number in original text}
NextIx := NextIx + 1;
END;
END;
{Assert: NextIx - 1 key words were retained.}
WriteLn('list done');
END{makelist};
{$I MLIBPAS\QUCKSRT2.PAS}
{Get, from the user, the leftmost column that key words will be aligned to.}
PROCEDURE GetJustify(VAR n:integer);
VAR junk:integer; s:string[255]; ThisBox:dialog_ptr;
BEGIN
Find_Dialog(LeftBox,ThisBox);
Center_Dialog(ThisBox);
junk := Do_Dialog(ThisBox,0);
Get_DEdit(ThisBox,LftCol,S);
READV(s,n);
END{getjustify};
PROCEDURE DoOutput(LeftJustify:integer); (******************)
VAR junk:integer; ThisBox:dialog_ptr; MyStr:string; Choice:tree_index;
PROCEDURE UserDialog;
VAR Width,junk:integer;
BEGIN
DoDither;
Choice := Do_Alert(
'[0][Epson compatible printer?][ No | Yes ]',2);
IF Choice = 2
THEN
BEGIN
Write(Printer,#27,'@',#07); {Reset the printer, sound the bell.}
Width := DO_ALERT(
'[0][ Print width? ][ 80 col | 137 col ]' ,1 );
DoDither;
IF Width = 2
THEN Write(Printer,#15); {compressed mode on an Epson printer.}
Write(Printer,#27,'N',#6); { Skip 6 lines at perforations.}
END;
DoDither;
junk := DO_Alert(
'[0][Is the paper aligned?][ OK ]' ,1);
END{userdialog};
{Print the lines defined in array 'TheLine'.}
PROCEDURE PrintList;
LABEL 100;
VAR ThisLine : integer;
{Print the line specified by the parameter.}
PROCEDURE PrintLine(LineNbr:integer; KeyWd:string);
VAR s:s254; chix:long_integer; Offset,i:integer;
BEGIN
s := '';
chix := FirstCh[LineNbr];
REPEAT
IF LENGTH(S) < 253
THEN S := CONCAT(S,Arr[chix]);
chix := chix + 1;
UNTIL Arr[chix - 1] = #10; {line feed}
{Assert: S is the line to print. Align so KeyWd starts at
column 'LeftJustify'.}
Offset := POS(KeyWd,S);
{If Offset is >= LeftJustify, the DO will not execute and the datum will
be left justified to column 1.}
FOR i := 1 TO LeftJustify - Offset DO
Write(' ');
Write(s);
END{printline};
BEGIN{printlist}
{Assert: 'TheLine' is a sorted list of lines in the original file to
print. The same line number can appear several times in the
array, once for each key word to be printed.}
FOR ThisLine := 1 TO NextIx-1 DO
BEGIN
IF NOT KeyPressed
THEN PrintLine(TheLine[ThisLine],KeyWdArr[ThisLine])
ELSE
BEGIN
junk := DO_ALERT('[0][Outpt aborted][ OK ]',1);
GOTO 100;
END;
END{do loop};
100:
END{printlist};
BEGIN{dooutput}
Find_Dialog(OutBox,ThisBox);
Center_Dialog(ThisBox);
Choice := Do_Dialog(ThisBox,0);
DrainKbd;
CASE Choice OF {B stands for button}
BMonitor : BEGIN
(*Write(#27,'E'); {clear screen}*)
DoDither;
junk :=
DO_ALERT('[0][Use Control S and Control Q| to suspend monitor][ OK ]',1);
REWRITE(OUTPUT,'CON:');
HIDE_MOUSE;
Write(#27,'E'); {Clear the screen.}
DrainKbd;
PrintList;
Write('... Press return');
ReadLn;
DoDither;
SHOW_MOUSE;
END;
BFile : BEGIN
MyStr := 'KWIC.OUT';
GetFileName(' Choose output file ',MyStr);
REWRITE(OUTPUT,MyStr);
PrintList;
CLOSE(OUTPUT);
END{file write};
BPrntr : BEGIN
UserDialog;
REWRITE(OUTPUT,'PRN:');
PrintList;
END;
BExit : HALT; {Normal exit for program.}
END {case};
OBJ_SETSTATE(ThisBox,Choice,Normal,TRUE);
END{dooutput};
BEGIN{doit} (*******************)
junki := DO_ALERT(
'[0][Key Words in Context| Version 1.0| by Merlin L. Hanson][ OK ]',1) ;
fnstr := 'KWICINPT.TXT';
GetFileName('Choose Input File',fnstr);
ReadFile(fnstr,Arr,NbrBytes,FALSE);
inix := 1; {'point at' the first character}
ReadBad;
DoDither; {Erase .PRG produced file prompts}
GetLength(LastKStrt);
FirstCh[1] := 1;
NextFirst := 2;
LineNbr := 1;
NextIx := 1;
Write(#27,'E',#10); {Clear screen, line feed.}
WriteLn('Computing...');
WriteLn('Make a list');
MakeList;
WriteLn('Number KWIC entries:',nextix-1);
{Assert: A 2-tuple has been created for each key word.
KeyWdArr contains a list of the key words (The same key word can recur many times.)
TheLine contains the line number in the original text
Also:
FirstCh is the offset in 'Arr' of the _first_ character of each _line_
in the input file.
Note that a sort is _not_ wanted on FirstCh.}
WriteLn('Start sort ', NextIx-1,' items');
QuickSort(KeyWdArr,NextIx-1);
WriteLn('Sort done');
GetJustify(Leftmost);
REPEAT
DoDither;
DoOutput(Leftmost); {contains an end of program exit}
UNTIL FALSE;
END{doit};
BEGIN {program}
REWRITE(Printer,'PRN:');
RSC_Name := 'KWIC.RSC';
IF INIT_GEM >= 0
THEN
IF LOAD_RESOURCE(RSC_Name)
THEN
BEGIN
DoIt;
EXIT_GEM;
END
ELSE Abort('Can''t find .RSC file');
END{program}.
(*** OVERALL STRUCTURE ***
Line numbers are approximate
7 PROGRAM KWIC;
8 {$I GEMSUBS.PAS}
9 {$I AUXSUBS.PAS}
10 {$I CURRENT\KWIC.I}
41 PROCEDURE Abort(s:string);
51 FUNCTION KeyPressed:boolean;
57 PROCEDURE Pause;
65 PROCEDURE DrainKbd;
75 PROCEDURE GetFileName(Note:string; VAR s:path_name);
78 |PROCEDURE GetDriveAndPath(VAR S:string);
87 | |FUNCTION CurrentDisk:integer;
91 | |PROCEDURE GetDir(VAR Ptr:string; DriveID:integer);
96 | |BEGIN {getdriveandpath}
111 |BEGIN{getfilename}
124 FUNCTION LowerCase(ch:char):char;
131 PROCEDURE DoDither;
133 |FUNCTION GetRez:integer;
135 |BEGIN{dodither}
148 PROCEDURE DoIt; (******************)
192 {$I MLIBPAS\TXTTOARR.PAS}
196 |PROCEDURE ReadBad; (*****************)
204 | |PROCEDURE AddStr(S:s11);
227 | |PROCEDURE PrintList;
238 | |BEGIN{readbad}
251 |PROCEDURE GetLength(VAR FinalCol:integer);
269 |PROCEDURE MakeList; (*****************)
272 | |FUNCTION FindStr(s:string):boolean;
274 | | |BEGIN{findstr}
287 | |PROCEDURE GetWord(VAR str:s11; VAR LineNumber:integer);
290 | | |PROCEDURE SaveFirst(ix:long_integer);
298 | | |PROCEDURE FindEnd;
299 | | | |BEGIN{findend}
325 | | |BEGIN{getword}
357 | |BEGIN{makelist}
378 |{$I MLIBPAS\QUCKSRT2.PAS}
381 |PROCEDURE GetJustify(VAR n:integer);
391 |PROCEDURE DoOutput(LeftJustify:integer); (******************)
394 | |PROCEDURE UserDialog;
417 | |PROCEDURE PrintList;
422 | | |PROCEDURE PrintLine(LineNbr:integer; KeyWd:string);
442 | | |BEGIN{printlist}
459 | |BEGIN{dooutput}
496 |BEGIN{doit} (****************)
532 BEGIN {program} *)